Get the data

To run this code, first set your credentials in the environment variables SB_USER and SB_PASS. Or, change this to use the StatsBomb free data!

matches <- StatsBombR::get.matches(
  Sys.getenv('SB_USER'),
  Sys.getenv('SB_PASS'),
  season_id = params$season_id,
  competition_id = params$comp_id
)

team_name <- matches[matches$home_team.home_team_id == params$team_id,]$home_team.home_team_name[1]

# slice to the last n games by the selected team
match_ids <- matches |>
  dplyr::filter(
    home_team.home_team_id == params$team_id | away_team.away_team_id == params$team_id
  ) |>
  dplyr::mutate(match_date = as.Date(match_date)) |>
  dplyr::arrange(dplyr::desc(match_date)) |>
  dplyr::slice(1:params$n_games) |>
  dplyr::pull(match_id)

# pull out the match events for all the games
match_events <- StatsBombR::allevents(
  Sys.getenv('SB_USER'),
  Sys.getenv('SB_PASS'),
  matches = match_ids
) |> 
    StatsBombR::allclean()
## Time difference of 5.748711 secs

Goalkicks

goalkicks <- match_events |> 
    dplyr::filter(pass.type.name == 'Goal Kick', team.id == params$team_id) |> 
    dplyr::mutate(
        success = !is.na(pass.outcome.id)
    ) |> 
    dplyr::select(
        x = location.x, y = location.y, end.x = pass.end_location.x, end.y = pass.end_location.y, success
    )

create_goalkick_chart(goalkicks, colour = params$colour) +
    ggplot2::ggtitle(
        label = paste0(team_name, ' Goalkick Zones'),
        subtitle = paste0(
            'Last ', params$n_games, ' Games'
        )
    )

Team Passing In First Third

for(pass.num in 1:3) {
    passes <- match_events |> 
        dplyr::filter(type.name == 'Pass') |> 
        dplyr::group_by(match_id, possession) |> 
        dplyr::mutate(
            action_number = dplyr::row_number()
        ) |> 
        # just passes in the first third
        dplyr::filter(
            location.x[1] <= 40
        ) |> 
        dplyr::ungroup() |> 
        dplyr::filter(
            team.id == params$team_id, 
            action_number == pass.num,
            !is.na(pass.pass_cluster_id)
        ) |> 
        dplyr::select(
            x = location.x, 
            y = location.y,
            end.x = pass.end_location.x,
            end.y = pass.end_location.y, 
            cluster = pass.pass_cluster_id
        )

    plot <- create_pass_clusters_chart(passes, colour = params$colour) +
        ggplot2::ggtitle(
            label = paste0(
                team_name, 
                ' First Third ', 
                toOrdinal::toOrdinal(pass.num),
                ' Passes'
            ),
            subtitle = paste0(
                'Last ', params$n_games, ' Games'
            )
        )

    print(plot)
}

Midfield Passes

passes <- match_events |> 
    dplyr::filter(
        type.name == 'Pass',
        team.id == params$team_id, 
        !is.na(pass.pass_cluster_id),
        position.id %in% c(9, 10, 11, 13, 14, 15)
    ) |> 
    dplyr::select(
        player.name,
        x = location.x, 
        y = location.y,
        end.x = pass.end_location.x,
        end.y = pass.end_location.y, 
        cluster = pass.pass_cluster_id
    )

for(player in unique(passes$player.name)) {
    plot <- passes |> 
        dplyr::filter(player.name == player) |> 
        create_pass_clusters_chart(n.clust = 5, colour = params$colour) +
        ggplot2::ggtitle(
            label = paste0(
                player, ' Most Common Passes'
            ),
            subtitle = paste0(
                'Last ', params$n_games, ' Games'
            )
        )

    print(plot)
}

Receiving On The Turn

combinations <- match_events |>
    dplyr::group_by(match_id) |>
    dplyr::mutate(
        reception_team.id = dplyr::if_else(type.name == 'Ball Receipt*', team.id, NA)
    ) |>
    tidyr::fill(reception_team.id, .direction = 'down') |>
    dplyr::mutate(
        reception_team.id = dplyr::if_else(is.na(reception_team.id), 0, reception_team.id)
    ) |> 
    dplyr::group_by(match_id, reception_team.id) |> 
    dplyr::mutate(reception_number = cumsum(type.name == 'Ball Receipt*')) |> 
    dplyr::filter(reception_number > 0) |> 
    dplyr::group_by(match_id, reception_team.id, reception_number) |> 
    dplyr::slice(1:3) |> 
    dplyr::filter(
        any(type.name == 'Pass'), type.name %in% c('Pass', 'Ball Receipt*')
    )

first_passes <- combinations |> 
    dplyr::filter(type.name == 'Pass') |> 
    dplyr::slice(1) |> 
    dplyr::ungroup() |> 
    dplyr::select(match_id, reception_team.id, reception_number, following_pass.id = id, player.id)

receptions <- combinations |> 
    dplyr::slice(1) |> 
    dplyr::select(
        match_id, 
        reception_team.id, 
        reception_number, 
        location.x,
        location.y,
        team.id,
        player.id,
        player.name,
        related_events
    ) |> 
    dplyr::mutate(
        previous_pass.id = sapply(related_events, \(x) x[[1]])
    ) |> 
    dplyr::select(-related_events) |> 
    dplyr::left_join(
        first_passes, by = c('match_id', 'reception_team.id', 'reception_number', 'player.id')
    ) |> 
    dplyr::filter(!is.na(following_pass.id)) |> 
    dplyr::left_join(
        dplyr::select(
            match_events, 
            previous_pass.id = id,
            previous_location.x = location.x, 
            previous_location.y = location.y, 
            previous_pass.angle = pass.angle
        )
    ) |> 
    dplyr::left_join(
        dplyr::select(
            match_events, 
            following_pass.id = id,
            following_location.x = location.x, 
            following_location.y = location.y, 
            following_pass.end_location.x = pass.end_location.x, 
            following_pass.end_location.y = pass.end_location.y, 
            following_pass.angle = pass.angle
        )
    ) |> 
    dplyr::filter(team.id == params$team_id, abs(previous_pass.angle) <= pi / 2) |> 
    dplyr::mutate(
        change_in_angle = (following_pass.angle - previous_pass.angle) %% (2 * pi) - pi
    )
## Joining with `by = join_by(previous_pass.id)`
## Joining with `by = join_by(following_pass.id)`
player <- receptions |> dplyr::filter(player.id == params$player_id)

create_passing_sonars(player, nbins = 20, colour = params$colour)

Pressure Bar Charts

under_pressure <- match_events |> 
    dplyr::filter(under_pressure == TRUE, team.id == params$team_id, type.name == 'Pass')

passing_under_pressure <- under_pressure |> 
    dplyr::group_by(player.id, player.name) |> 
    dplyr::summarise(
        total_passes = dplyr::n(),
        expected_passes = sum(pass.pass_success_probability, na.rm = TRUE),
        complete_passes = sum(is.na(pass.outcome.id))
    ) |> 
    dplyr::filter(total_passes >= 20) |> 
    dplyr::mutate(
        average_pass_over_expectation = 10 * (complete_passes - expected_passes) / total_passes
    )
## `summarise()` has grouped output by 'player.id'. You can override using the
## `.groups` argument.
ggplot2::ggplot(passing_under_pressure) +
    ggplot2::geom_bar(
        ggplot2::aes(
            y = reorder(player.name, average_pass_over_expectation), 
            x = average_pass_over_expectation,
            fill = average_pass_over_expectation
        ),
        colour = "#18191A", 
        alpha = 0.9,
        stat = 'identity'
    ) +
    ggplot2::scale_fill_gradient2(
        guide = NULL, 
        midpoint = 0, 
        low = "#b41313",
        mid = "#a1a1a1", 
        high = "#007a3e"
    ) +
    ggplot2::xlab('Passes Completed Above Expected') +
    theme_tutorial() +
    ggplot2::theme(
        axis.text.y = ggplot2::element_text(hjust = 1),
        axis.text.x = ggplot2::element_text(),
        axis.title.x = ggplot2::element_text(),
    )

Gamestate

gamestates <- StatsBombR::get.gamestate(match_events)[[1]] |> 
    dplyr::group_by(
        GameState, team.id
    ) |> 
    dplyr::summarise(
        average_expected_passes = mean(pass.pass_success_probability, na.rm = TRUE),
        average_pass_length = mean(pass.length, na.rm  =TRUE)
    ) |> 
    dplyr::filter(team.id == params$team_id)
## Joining with `by = join_by(match_id)`
## Joining with `by = join_by(match_id)`
## Joining with `by = join_by(team.id, team.name, match_id)`
## Joining with `by = join_by(match_id, GameState)`
## Joining with `by = join_by(match_id, team.name)`
## Joining with `by = join_by(match_id, team.name, GameState)`
## `summarise()` has grouped output by 'GameState'. You can override using the `.groups` argument.
metrics <- c('Average Expected Pass' = 'average_expected_passes', 'Average Pass Length' = 'average_pass_length')

create_gamestate_scatter(gamestates, metrics = metrics)